Construct Single-Hierarchical P/NBD Model for Online Retail Transaction Data

Author

Mick Cooney

Published

February 16, 2024

In this workbook we construct the non-hierarchical P/NBD models on the synthetic data with the longer timeframe.

1 Load and Construct Datasets

We start by modelling the P/NBD model using our synthetic datasets before we try to model real-life data.

1.1 Load Online Retail Data

We now want to load the online retail transaction data.

Code
customer_cohortdata_tbl <- read_rds("data/onlineretail_cohort_tbl.rds")
customer_cohortdata_tbl |> glimpse()
Rows: 5,852
Columns: 5
$ customer_id     <chr> "12346", "12347", "12348", "12349", "12350", "12351", …
$ cohort_qtr      <chr> "2010 Q1", "2010 Q4", "2010 Q3", "2010 Q2", "2011 Q1",…
$ cohort_ym       <chr> "2010 03", "2010 10", "2010 09", "2010 04", "2011 02",…
$ first_tnx_date  <date> 2010-03-02, 2010-10-31, 2010-09-27, 2010-04-29, 2011-…
$ total_tnx_count <int> 3, 8, 5, 3, 1, 1, 9, 2, 1, 2, 6, 2, 5, 10, 6, 4, 10, 2…
Code
customer_transactions_tbl <- read_rds("data/onlineretail_transactions_tbl.rds")
customer_transactions_tbl |> glimpse()
Rows: 36,658
Columns: 5
$ tnx_timestamp <dttm> 2009-12-01 07:45:00, 2009-12-01 07:45:59, 2009-12-01 09…
$ orig_id       <chr> "13085", "13085", "13078", "15362", "18102", "12682", "1…
$ customer_id   <fct> 13085, 13085, 13078, 15362, 18102, 12682, 18087, 18087, …
$ invoice_id    <chr> "489434", "489435", "489436", "489437", "489438", "48943…
$ tnx_amount    <dbl> 505.30, 145.80, 630.33, 310.75, 2286.24, 372.30, 50.40, …
Code
customer_subset_id <- read_rds("data/onlineretail_customer_subset_ids.rds")
customer_subset_id |> glimpse()
 Factor w/ 5852 levels "13085","13078",..: 2 3 8 10 14 16 17 21 27 30 ...

1.2 Load Derived Data

Code
customer_summarystats_tbl <- read_rds("data/onlineretail_customer_summarystats_tbl.rds")

obs_fitdata_tbl   <- read_rds("data/onlineretail_obs_fitdata_tbl.rds")
obs_validdata_tbl <- read_rds("data/onlineretail_obs_validdata_tbl.rds")

customer_fit_stats_tbl <- obs_fitdata_tbl |>
  rename(x = tnx_count)

1.3 Load Subset Data

We also want to construct our data subsets for the purposes of speeding up our valuations.

Code
customer_fit_subset_tbl <- obs_fitdata_tbl |>
  filter(customer_id %in% customer_subset_id)

customer_fit_subset_tbl |> glimpse()
Rows: 1,000
Columns: 6
$ customer_id    <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 17700,…
$ first_tnx_date <dttm> 2009-12-01 09:05:59, 2009-12-01 09:08:00, 2009-12-01 0…
$ last_tnx_date  <dttm> 2010-11-30 13:17:59, 2010-09-17 10:36:59, 2010-09-30 1…
$ tnx_count      <dbl> 30, 1, 20, 6, 0, 35, 14, 9, 4, 9, 78, 2, 17, 5, 22, 2, …
$ t_x            <dbl> 52.02500, 41.43740, 43.32966, 51.72589, 0.00000, 52.034…
$ T_cal          <dbl> 52.08869, 52.08849, 52.08433, 52.08274, 52.07847, 52.07…
Code
customer_valid_subset_tbl <- obs_validdata_tbl |>
  filter(customer_id %in% customer_subset_id)

customer_valid_subset_tbl |> glimpse()
Rows: 1,000
Columns: 3
$ customer_id       <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 177…
$ tnx_count         <dbl> 26, 0, 12, 7, 0, 37, 17, 4, 4, 13, 48, 0, 13, 3, 16,…
$ tnx_last_interval <dbl> 52.950000, NA, 52.931151, 51.780853, NA, 53.089286, …

We now use these datasets to set the start and end dates for our various validation methods.

Code
dates_lst <- read_rds("data/onlineretail_simulation_dates.rds")

use_fit_start_date <- dates_lst$use_fit_start_date
use_fit_end_date   <- dates_lst$use_fit_end_date

use_valid_start_date <- dates_lst$use_valid_start_date
use_valid_end_date   <- dates_lst$use_valid_end_date

We now split out the transaction data into fit and validation datasets.

Code
customer_fit_transactions_tbl <- customer_transactions_tbl |>
  filter(
    customer_id %in% customer_subset_id,
    tnx_timestamp >= use_fit_start_date,
    tnx_timestamp <= use_fit_end_date
    )
  
customer_fit_transactions_tbl |> glimpse()
Rows: 4,164
Columns: 5
$ tnx_timestamp <dttm> 2009-12-01 09:05:59, 2009-12-01 09:08:00, 2009-12-01 09…
$ orig_id       <chr> "13078", "15362", "14110", "13758", "17592", "13767", "1…
$ customer_id   <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 17700, …
$ invoice_id    <chr> "489436", "489437", "489443", "489446", "489462", "48946…
$ tnx_amount    <dbl> 630.33, 310.75, 285.06, 996.10, 148.30, 1197.80, 251.10,…
Code
customer_valid_transactions_tbl <- customer_transactions_tbl |>
  filter(
    customer_id %in% customer_subset_id,
    tnx_timestamp >= use_valid_start_date,
    tnx_timestamp <= use_valid_end_date
    )
  
customer_valid_transactions_tbl |> glimpse()
Rows: 3,422
Columns: 5
$ tnx_timestamp <dttm> 2010-12-01 09:32:00, 2010-12-01 09:59:00, 2010-12-01 10…
$ orig_id       <chr> "15291", "16250", "12431", "13408", "13767", "12791", "1…
$ customer_id   <fct> 15291, 16250, 12431, 13408, 13767, 12791, 17908, 16583, …
$ invoice_id    <chr> "536376", "536388", "536389", "536394", "536395", "53640…
$ tnx_amount    <dbl> 657.60, 452.28, 716.50, 2049.36, 1015.76, 355.20, 486.56…

Finally, we want to extract the first transaction for each customer, so we can add this data to assess our models.

Code
customer_initial_tnx_tbl <- customer_fit_transactions_tbl |>
  slice_min(n = 1, order_by = tnx_timestamp, by = customer_id)

customer_initial_tnx_tbl |> glimpse()
Rows: 1,002
Columns: 5
$ tnx_timestamp <dttm> 2009-12-01 09:05:59, 2009-12-01 09:08:00, 2009-12-01 09…
$ orig_id       <chr> "13078", "15362", "14110", "13758", "17592", "13767", "1…
$ customer_id   <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 17700, …
$ invoice_id    <chr> "489436", "489437", "489443", "489446", "489462", "48946…
$ tnx_amount    <dbl> 630.33, 310.75, 285.06, 996.10, 148.30, 1197.80, 251.10,…

We now expand out these initial transactions so that we can append them to our simulations.

Code
sim_init_tbl <- customer_initial_tnx_tbl |>
  transmute(
    customer_id,
    draw_id       = list(1:n_sim),
    tnx_timestamp,
    tnx_amount
    ) |>
  unnest(draw_id)

sim_init_tbl |> glimpse()
Rows: 2,004,000
Columns: 4
$ customer_id   <fct> 13078, 13078, 13078, 13078, 13078, 13078, 13078, 13078, …
$ draw_id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
$ tnx_timestamp <dttm> 2009-12-01 09:05:59, 2009-12-01 09:05:59, 2009-12-01 09…
$ tnx_amount    <dbl> 630.33, 630.33, 630.33, 630.33, 630.33, 630.33, 630.33, …

Before we start on that, we set a few parameters for the workbook to organise our Stan code.

Code
stan_modeldir <- "stan_models"
stan_codedir  <-   "stan_code"

2 Fit First Hierarchical Lambda Model

Our first hierarchical model puts a hierarchical prior around the mean of our population \(\lambda\) - lambda_mn.

Once again we use a Gamma prior for it.

2.1 Compile and Fit Stan Model

We now compile this model using CmdStanR.

Code
pnbd_onehierlambda_stanmodel <- cmdstan_model(
  "stan_code/pnbd_onehier_lambda.stan",
  include_paths =   stan_codedir,
  pedantic      =           TRUE,
  dir           =  stan_modeldir
  )

We then use this compiled model with our data to produce a fit of the data.

Code
stan_modelname <- "pnbd_onlineretail_onehierlambda1"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_lambda_mn_p1 = 0.25,
    hier_lambda_mn_p2 = 1,

    lambda_cv = 1.00,
    
    mu_mn     = 0.10,
    mu_cv     = 1.00,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_onehierlambda1_stanfit <- pnbd_onehierlambda_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_onehierlambda1_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_onehierlambda1_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_onehierlambda1_stanfit$print()
  variable      mean    median    sd   mad        q5       q95 rhat ess_bulk
 lp__      -61826.27 -61825.50 71.88 69.61 -61947.51 -61705.80 1.02      544
 lambda_mn      0.13      0.13  0.00  0.00      0.12      0.13 1.00     1166
 lambda[1]      0.32      0.30  0.14  0.14      0.13      0.58 1.00     1940
 lambda[2]      0.51      0.51  0.09  0.09      0.37      0.67 1.00     2019
 lambda[3]      0.03      0.03  0.02  0.02      0.01      0.08 1.00     2130
 lambda[4]      1.43      1.42  0.15  0.15      1.19      1.68 1.01     2062
 lambda[5]      0.33      0.33  0.07  0.07      0.22      0.47 1.00     2441
 lambda[6]      0.25      0.25  0.07  0.07      0.15      0.36 1.00     2148
 lambda[7]      0.05      0.05  0.03  0.03      0.01      0.10 1.00     2125
 lambda[8]      0.38      0.37  0.09  0.09      0.24      0.53 1.00     1969
 ess_tail
     1028
     1458
     1304
     1466
      954
     1367
     1290
     1263
      932
     1250

 # showing 10 of 12720 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_onehierlambda1_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda1-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda1-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda1-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda1-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

2.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_onehierlambda1_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_onlineretail_onehierlambda1_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_onehierlambda1_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

2.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_onehierlambda1_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_onehierlambda1_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_onehierlambda1",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1110
  )

pnbd_onlineretail_onehierlambda1_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_onehierlambda1_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_onehierlambda1_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_onehierlambda1_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_onehierlambda1_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_onehierlambda1_assess_valid_simstats_tbl.rds"

2.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehierlambda1_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

2.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehierlambda1_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

3 Fit Second Hierarchical Lambda Model

In this model, we are going with a broadly similar model but we are instead using a different mean for our hierarchical prior.

3.1 Fit Stan Model

We now want to fit the model to our data using this alternative prior for lambda_mn.

Code
stan_modelname <- "pnbd_onlineretail_onehierlambda2"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_lambda_mn_p1 = 0.50,
    hier_lambda_mn_p2 = 1,

    lambda_cv = 1.00,
    
    mu_mn     = 0.10,
    mu_cv     = 1.00,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_onehierlambda2_stanfit <- pnbd_onehierlambda_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_onehierlambda2_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_onehierlambda2_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_onehierlambda2_stanfit$print()
  variable      mean    median    sd   mad        q5       q95 rhat ess_bulk
 lp__      -61822.08 -61819.40 69.60 69.24 -61934.94 -61710.90 1.00      657
 lambda_mn      0.13      0.13  0.00  0.00      0.12      0.13 1.00     1602
 lambda[1]      0.32      0.30  0.14  0.13      0.13      0.58 1.00     2759
 lambda[2]      0.52      0.51  0.09  0.09      0.37      0.68 1.00     3185
 lambda[3]      0.03      0.03  0.02  0.02      0.01      0.08 1.00     1720
 lambda[4]      1.42      1.42  0.15  0.15      1.18      1.69 1.00     2525
 lambda[5]      0.33      0.33  0.07  0.07      0.22      0.47 1.00     1864
 lambda[6]      0.25      0.24  0.06  0.06      0.15      0.36 1.00     2561
 lambda[7]      0.05      0.05  0.03  0.03      0.01      0.11 1.00     2539
 lambda[8]      0.38      0.37  0.09  0.08      0.24      0.53 1.00     2682
 ess_tail
     1318
     1263
     1494
     1530
     1163
     1272
     1506
     1341
     1336
     1726

 # showing 10 of 12720 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_onehierlambda2_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda2-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda2-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda2-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehierlambda2-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

3.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_onehierlambda2_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_onlineretail_onehierlambda2_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_onehierlambda2_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

3.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_onehierlambda2_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_onehierlambda2_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_onehierlambda2",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1120
  )

pnbd_onlineretail_onehierlambda2_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_onehierlambda2_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_onehierlambda2_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_onehierlambda2_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_onehierlambda2_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_onehierlambda2_assess_valid_simstats_tbl.rds"

3.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehierlambda2_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

3.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehierlambda2_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

4 Fit First Hierarchical Mu Model

We now construct the same hierarchical model but based around mu_mn.

4.1 Compile and Fit Stan Model

We compile this model using CmdStanR.

Code
pnbd_onehiermu_stanmodel <- cmdstan_model(
  "stan_code/pnbd_onehier_mu.stan",
  include_paths =   stan_codedir,
  pedantic      =           TRUE,
  dir           =  stan_modeldir
  )

We then use this compiled model with our data to produce a fit of the data.

Code
stan_modelname <- "pnbd_onlineretail_onehiermu1"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_mu_mn_p1 = 0.50,
    hier_mu_mn_p2 = 1.00,

    lambda_mn     = 0.25,
    lambda_cv     = 1.00,

    mu_cv         = 1.00

    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_onehiermu1_stanfit <- pnbd_onehiermu_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_onehiermu1_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_onehiermu1_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_onehiermu1_stanfit$print()
  variable      mean    median    sd   mad        q5       q95 rhat ess_bulk
 lp__      -57192.15 -57193.05 72.68 70.65 -57312.81 -57072.87 1.00      667
 mu_mn          0.00      0.00  0.00  0.00      0.00      0.01 1.08       49
 lambda[1]      0.40      0.38  0.19  0.18      0.15      0.75 1.00     2202
 lambda[2]      0.55      0.55  0.10  0.10      0.40      0.73 1.00     2353
 lambda[3]      0.03      0.03  0.02  0.02      0.01      0.08 1.00     1684
 lambda[4]      1.52      1.51  0.16  0.17      1.27      1.79 1.00     2595
 lambda[5]      0.35      0.35  0.08  0.08      0.24      0.49 1.00     2253
 lambda[6]      0.27      0.26  0.07  0.07      0.16      0.39 1.00     2722
 lambda[7]      0.05      0.05  0.03  0.03      0.01      0.11 1.00     1487
 lambda[8]      0.39      0.38  0.09  0.09      0.26      0.55 1.00     2280
 ess_tail
     1095
      141
     1191
     1252
     1177
     1720
     1381
     1442
      962
     1542

 # showing 10 of 12720 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_onehiermu1_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu1-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu1-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu1-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu1-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

The following parameters had split R-hat greater than 1.05:
  mu_mn, beta
Such high values indicate incomplete mixing and biased estimation.
You should consider regularizating your model with additional prior information or a more effective parameterization.

Processing complete.

4.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "mu_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_onehiermu1_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_onlineretail_onehiermu1_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_onehiermu1_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

4.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_onehiermu1_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_onehiermu1_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_onehiermu1",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1130
  )

pnbd_onlineretail_onehiermu1_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_onehiermu1_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_onehiermu1_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_onehiermu1_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_onehiermu1_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_onehiermu1_assess_valid_simstats_tbl.rds"

4.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehiermu1_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

4.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehiermu1_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

5 Fit Second Hierarchical Mu Model

In this model, we are going with a broadly similar model but we are instead using a different mean for our hierarchical prior.

5.1 Fit Stan Model

We now want to fit the model to our data using this alternative prior for lambda_mn.

Code
stan_modelname <- "pnbd_onlineretail_onehiermu2"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_mu_mn_p1 = 0.25,
    hier_mu_mn_p2 = 1.00,

    lambda_mn     = 0.25,
    lambda_cv     = 1.00,

    mu_cv         = 1.00
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_onehiermu2_stanfit <- pnbd_onehiermu_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_onehiermu2_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_onehiermu2_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_onehiermu2_stanfit$print()
  variable      mean    median    sd   mad        q5       q95 rhat ess_bulk
 lp__      -57191.67 -57190.40 68.70 69.53 -57305.21 -57079.40 1.01      759
 mu_mn          0.00      0.00  0.00  0.00      0.00      0.01 1.05       65
 lambda[1]      0.40      0.37  0.18  0.17      0.16      0.73 1.00     2496
 lambda[2]      0.55      0.55  0.10  0.10      0.40      0.73 1.00     2295
 lambda[3]      0.04      0.03  0.03  0.02      0.01      0.08 1.00     1839
 lambda[4]      1.52      1.52  0.16  0.17      1.26      1.80 1.00     2119
 lambda[5]      0.36      0.35  0.08  0.08      0.24      0.49 1.00     2834
 lambda[6]      0.27      0.26  0.07  0.07      0.16      0.39 1.00     2426
 lambda[7]      0.05      0.05  0.03  0.03      0.01      0.11 1.00     2685
 lambda[8]      0.39      0.38  0.09  0.09      0.26      0.55 1.00     3023
 ess_tail
     1204
       99
     1436
     1305
      889
     1040
     1402
     1239
     1087
     1430

 # showing 10 of 12720 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_onehiermu2_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu2-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu2-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu2-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_onehiermu2-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

5.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "mu_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_onehiermu2_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_onlineretail_onehiermu2_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_onehiermu2_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

5.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_onehiermu2_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_onehiermu2_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_onehiermu2",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1140
  )

pnbd_onlineretail_onehiermu2_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_onehiermu2_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_onehiermu2_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_onehiermu2_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_onehiermu2_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_onehiermu2_assess_valid_simstats_tbl.rds"

5.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehiermu2_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

5.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_onehiermu2_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

6 Compare Model Outputs

We have looked at each of the models individually, but it is also worth looking at each of the models as a group.

We now want to combine both the fit and valid transaction sets to calculate the summary statistics for both.

Code
obs_summstats_tbl <- list(
    fit   = customer_fit_transactions_tbl,
    valid = customer_valid_transactions_tbl
    ) |>
  bind_rows(.id = "assess_type") |>
  group_by(assess_type) |>
  calculate_transaction_summary_statistics() |>
  pivot_longer(
    cols      = !assess_type,
    names_to  = "label",
    values_to = "obs_value"
    )

obs_summstats_tbl |> glimpse()
Rows: 16
Columns: 3
$ assess_type <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", "v…
$ label       <chr> "p10", "p25", "p50", "p75", "p90", "p99", "total_count", "…
$ obs_value   <dbl> 1.000000, 1.000000, 2.000000, 5.000000, 9.000000, 27.01000…
Code
model_assess_transactions_tbl <- dir_ls("data", regexp = "pnbd_onlineretail_(fixed|one).*_assess_.*index") |>
  enframe(name = NULL, value = "file_path") |>
  mutate(
    model_label = str_replace(file_path, "data/pnbd_onlineretail_(.*?)_assess_.*", "\\1"),
    assess_type = if_else(str_detect(file_path, "_assess_fit_"), "fit", "valid"),
    
    assess_data = map(
      file_path, construct_model_assessment_data,
      
      .progress = "construct_assess_data"
      )
    ) |>
  select(model_label, assess_type, assess_data) |>
  unnest(assess_data)

model_assess_transactions_tbl |> glimpse()
Rows: 115,975,227
Columns: 6
$ model_label   <chr> "fixed1", "fixed1", "fixed1", "fixed1", "fixed1", "fixed…
$ assess_type   <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", …
$ customer_id   <fct> 13078, 13078, 13078, 13078, 13078, 13078, 13078, 13078, …
$ draw_id       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ tnx_timestamp <dttm> 2009-12-01 12:50:44, 2010-01-10 17:25:42, 2010-01-20 11…
$ tnx_amount    <dbl> 113.06, 154.46, 130.47, 194.81, 104.02, 99.04, 83.73, 23…

We now want to calculate the transaction statistics on this full dataset, for each separate draw.

Code
model_assess_tbl <- model_assess_transactions_tbl |>
  group_by(model_label, assess_type, draw_id) |>
  calculate_transaction_summary_statistics()

model_assess_tbl |> glimpse()
Rows: 32,000
Columns: 11
$ model_label <chr> "fixed1", "fixed1", "fixed1", "fixed1", "fixed1", "fixed1"…
$ assess_type <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", "f…
$ draw_id     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
$ p10         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ p25         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ p50         <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
$ p75         <dbl> 5.00, 5.00, 6.00, 5.00, 5.00, 5.00, 5.00, 5.00, 6.00, 6.00…
$ p90         <dbl> 9.0, 9.0, 10.0, 10.0, 10.0, 11.0, 9.7, 10.0, 10.0, 10.0, 1…
$ p99         <dbl> 27.58, 26.76, 26.56, 26.33, 24.69, 25.49, 25.00, 26.02, 28…
$ total_count <int> 2812, 2692, 2822, 2680, 2747, 2891, 2847, 2792, 2834, 2817…
$ mean_count  <dbl> 4.373250, 4.391517, 4.529695, 4.542373, 4.346519, 4.677994…

We now combine all this data to create a number of different comparison plots for the various summary statistics.

Code
#! echo: TRUE

create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "total_count", "Total Transactions"
  )

Code
create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "mean_count", "Average Transactions per Customer"
  )

Code
create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "p99", "99th Percentile Count"
  )

6.1 Write Assessment Data to Disk

We now want to save the assessment data to disk.

Code
model_assess_tbl |> write_rds("data/assess_data_pnbd_onlineretail_onehier_tbl.rds")

R Environment

Code
options(width = 120L)
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.1 (2023-06-16)
 os       Ubuntu 22.04.3 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Dublin
 date     2024-02-16
 pandoc   3.1.1 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package        * version   date (UTC) lib source
 abind            1.4-5     2016-07-21 [1] RSPM (R 4.3.0)
 arrayhelpers     1.1-0     2020-02-04 [1] RSPM (R 4.3.0)
 backports        1.4.1     2021-12-13 [1] RSPM (R 4.3.0)
 base64enc        0.1-3     2015-07-28 [1] RSPM (R 4.3.0)
 bayesplot      * 1.10.0    2022-11-16 [1] RSPM (R 4.3.0)
 bit              4.0.5     2022-11-15 [1] RSPM (R 4.3.0)
 bit64            4.0.5     2020-08-30 [1] RSPM (R 4.3.0)
 bridgesampling   1.1-2     2021-04-16 [1] RSPM (R 4.3.0)
 brms           * 2.20.4    2023-09-25 [1] RSPM (R 4.3.0)
 Brobdingnag      1.2-9     2022-10-19 [1] RSPM (R 4.3.0)
 cachem           1.0.8     2023-05-01 [1] RSPM (R 4.3.0)
 callr            3.7.3     2022-11-02 [1] RSPM (R 4.3.0)
 checkmate        2.3.0     2023-10-25 [1] RSPM (R 4.3.0)
 cli              3.6.1     2023-03-23 [1] RSPM (R 4.3.0)
 cmdstanr       * 0.7.0     2023-12-13 [1] Github (stan-dev/cmdstanr@7e10703)
 coda             0.19-4    2020-09-30 [1] RSPM (R 4.3.0)
 codetools        0.2-19    2023-02-01 [2] CRAN (R 4.3.1)
 colorspace       2.1-0     2023-01-23 [1] RSPM (R 4.3.0)
 colourpicker     1.3.0     2023-08-21 [1] RSPM (R 4.3.0)
 conflicted     * 1.2.0     2023-02-01 [1] RSPM (R 4.3.0)
 cowplot        * 1.1.1     2020-12-30 [1] RSPM (R 4.3.0)
 crayon           1.5.2     2022-09-29 [1] RSPM (R 4.3.0)
 crosstalk        1.2.0     2021-11-04 [1] RSPM (R 4.3.0)
 curl             5.1.0     2023-10-02 [1] RSPM (R 4.3.0)
 digest           0.6.33    2023-07-07 [1] RSPM (R 4.3.0)
 directlabels   * 2023.8.25 2023-09-01 [1] RSPM (R 4.3.0)
 distributional   0.3.2     2023-03-22 [1] RSPM (R 4.3.0)
 dplyr          * 1.1.3     2023-09-03 [1] RSPM (R 4.3.0)
 DT               0.30      2023-10-05 [1] RSPM (R 4.3.0)
 dygraphs         1.1.1.6   2018-07-11 [1] RSPM (R 4.3.0)
 ellipsis         0.3.2     2021-04-29 [1] RSPM (R 4.3.0)
 evaluate         0.22      2023-09-29 [1] RSPM (R 4.3.0)
 fansi            1.0.5     2023-10-08 [1] RSPM (R 4.3.0)
 farver           2.1.1     2022-07-06 [1] RSPM (R 4.3.0)
 fastmap          1.1.1     2023-02-24 [1] RSPM (R 4.3.0)
 forcats        * 1.0.0     2023-01-29 [1] RSPM (R 4.3.0)
 fs             * 1.6.3     2023-07-20 [1] RSPM (R 4.3.0)
 furrr          * 0.3.1     2022-08-15 [1] RSPM (R 4.3.0)
 future         * 1.33.0    2023-07-01 [1] RSPM (R 4.3.0)
 generics         0.1.3     2022-07-05 [1] RSPM (R 4.3.0)
 ggdist           3.3.0     2023-05-13 [1] RSPM (R 4.3.0)
 ggplot2        * 3.4.4     2023-10-12 [1] RSPM (R 4.3.0)
 globals          0.16.2    2022-11-21 [1] RSPM (R 4.3.0)
 glue           * 1.6.2     2022-02-24 [1] RSPM (R 4.3.0)
 gridExtra        2.3       2017-09-09 [1] RSPM (R 4.3.0)
 gtable           0.3.4     2023-08-21 [1] RSPM (R 4.3.0)
 gtools           3.9.4     2022-11-27 [1] RSPM (R 4.3.0)
 hms              1.1.3     2023-03-21 [1] RSPM (R 4.3.0)
 htmltools        0.5.6.1   2023-10-06 [1] RSPM (R 4.3.0)
 htmlwidgets      1.6.2     2023-03-17 [1] RSPM (R 4.3.0)
 httpuv           1.6.12    2023-10-23 [1] RSPM (R 4.3.0)
 igraph           1.5.1     2023-08-10 [1] RSPM (R 4.3.0)
 inline           0.3.19    2021-05-31 [1] RSPM (R 4.3.0)
 jsonlite         1.8.7     2023-06-29 [1] RSPM (R 4.3.0)
 knitr            1.44      2023-09-11 [1] RSPM (R 4.3.0)
 labeling         0.4.3     2023-08-29 [1] RSPM (R 4.3.0)
 later            1.3.1     2023-05-02 [1] RSPM (R 4.3.0)
 lattice          0.21-8    2023-04-05 [2] CRAN (R 4.3.1)
 lifecycle        1.0.3     2022-10-07 [1] RSPM (R 4.3.0)
 listenv          0.9.0     2022-12-16 [1] RSPM (R 4.3.0)
 loo              2.6.0     2023-03-31 [1] RSPM (R 4.3.0)
 lubridate      * 1.9.3     2023-09-27 [1] RSPM (R 4.3.0)
 magrittr       * 2.0.3     2022-03-30 [1] RSPM (R 4.3.0)
 markdown         1.11      2023-10-19 [1] RSPM (R 4.3.0)
 Matrix           1.5-4.1   2023-05-18 [2] CRAN (R 4.3.1)
 matrixStats      1.0.0     2023-06-02 [1] RSPM (R 4.3.0)
 memoise          2.0.1     2021-11-26 [1] RSPM (R 4.3.0)
 mime             0.12      2021-09-28 [1] RSPM (R 4.3.0)
 miniUI           0.1.1.1   2018-05-18 [1] RSPM (R 4.3.0)
 munsell          0.5.0     2018-06-12 [1] RSPM (R 4.3.0)
 mvtnorm          1.2-3     2023-08-25 [1] RSPM (R 4.3.0)
 nlme             3.1-162   2023-01-31 [2] CRAN (R 4.3.1)
 parallelly       1.36.0    2023-05-26 [1] RSPM (R 4.3.0)
 pillar           1.9.0     2023-03-22 [1] RSPM (R 4.3.0)
 pkgbuild         1.4.2     2023-06-26 [1] RSPM (R 4.3.0)
 pkgconfig        2.0.3     2019-09-22 [1] RSPM (R 4.3.0)
 plyr             1.8.9     2023-10-02 [1] RSPM (R 4.3.0)
 posterior      * 1.4.1     2023-03-14 [1] RSPM (R 4.3.0)
 prettyunits      1.2.0     2023-09-24 [1] RSPM (R 4.3.0)
 processx         3.8.2     2023-06-30 [1] RSPM (R 4.3.0)
 promises         1.2.1     2023-08-10 [1] RSPM (R 4.3.0)
 ps               1.7.5     2023-04-18 [1] RSPM (R 4.3.0)
 purrr          * 1.0.2     2023-08-10 [1] RSPM (R 4.3.0)
 quadprog         1.5-8     2019-11-20 [1] RSPM (R 4.3.0)
 QuickJSR         1.0.7     2023-10-15 [1] RSPM (R 4.3.0)
 R6               2.5.1     2021-08-19 [1] RSPM (R 4.3.0)
 Rcpp           * 1.0.11    2023-07-06 [1] RSPM (R 4.3.0)
 RcppParallel     5.1.7     2023-02-27 [1] RSPM (R 4.3.0)
 readr          * 2.1.4     2023-02-10 [1] RSPM (R 4.3.0)
 reshape2         1.4.4     2020-04-09 [1] RSPM (R 4.3.0)
 rlang          * 1.1.1     2023-04-28 [1] RSPM (R 4.3.0)
 rmarkdown        2.25      2023-09-18 [1] RSPM (R 4.3.0)
 rstan            2.32.3    2023-10-15 [1] RSPM (R 4.3.0)
 rstantools       2.3.1.1   2023-07-18 [1] RSPM (R 4.3.0)
 rstudioapi       0.15.0    2023-07-07 [1] RSPM (R 4.3.0)
 scales         * 1.2.1     2022-08-20 [1] RSPM (R 4.3.0)
 sessioninfo      1.2.2     2021-12-06 [1] RSPM (R 4.3.0)
 shiny            1.7.5.1   2023-10-14 [1] RSPM (R 4.3.0)
 shinyjs          2.1.0     2021-12-23 [1] RSPM (R 4.3.0)
 shinystan        2.6.0     2022-03-03 [1] RSPM (R 4.3.0)
 shinythemes      1.2.0     2021-01-25 [1] RSPM (R 4.3.0)
 StanHeaders      2.26.28   2023-09-07 [1] RSPM (R 4.3.0)
 stringi          1.7.12    2023-01-11 [1] RSPM (R 4.3.0)
 stringr        * 1.5.0     2022-12-02 [1] RSPM (R 4.3.0)
 svUnit           1.0.6     2021-04-19 [1] RSPM (R 4.3.0)
 tensorA          0.36.2    2020-11-19 [1] RSPM (R 4.3.0)
 threejs          0.3.3     2020-01-21 [1] RSPM (R 4.3.0)
 tibble         * 3.2.1     2023-03-20 [1] RSPM (R 4.3.0)
 tidybayes      * 3.0.6     2023-08-12 [1] RSPM (R 4.3.0)
 tidyr          * 1.3.0     2023-01-24 [1] RSPM (R 4.3.0)
 tidyselect       1.2.0     2022-10-10 [1] RSPM (R 4.3.0)
 tidyverse      * 2.0.0     2023-02-22 [1] RSPM (R 4.3.0)
 timechange       0.2.0     2023-01-11 [1] RSPM (R 4.3.0)
 tzdb             0.4.0     2023-05-12 [1] RSPM (R 4.3.0)
 utf8             1.2.4     2023-10-22 [1] RSPM (R 4.3.0)
 V8               4.4.0     2023-10-09 [1] RSPM (R 4.3.0)
 vctrs            0.6.4     2023-10-12 [1] RSPM (R 4.3.0)
 vroom            1.6.4     2023-10-02 [1] RSPM (R 4.3.0)
 withr            2.5.1     2023-09-26 [1] RSPM (R 4.3.0)
 xfun             0.40      2023-08-09 [1] RSPM (R 4.3.0)
 xtable           1.8-4     2019-04-21 [1] RSPM (R 4.3.0)
 xts              0.13.1    2023-04-16 [1] RSPM (R 4.3.0)
 yaml             2.3.7     2023-01-23 [1] RSPM (R 4.3.0)
 zoo              1.8-12    2023-04-13 [1] RSPM (R 4.3.0)

 [1] /usr/local/lib/R/site-library
 [2] /usr/local/lib/R/library

──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Code
options(width = 80L)